# -*- coding: utf-8 -*-
import asyncio
import json
import os
import random
import time
import uuid
from urllib.parse import urlparse, parse_qs
import requests
from pyppeteer import launch
from pyppeteer.network_manager import Response, Request
from zc_core.middlewares.agents.user_agents import agents
from zc_core.middlewares.proxies.proxy_facade import ProxyFacade

from ctaxccgp.simple.simple_dao import SimpleDao
from ctaxccgp.simple.slider_util import slide_move
from ctaxccgp.simple import cache_util as cache

proxy = ''
proxy_facade = ProxyFacade()
# 静态资源缓存
static_cache = {}


class ItemCrawler(object):
    tmp_dir = 'E:/TempData/{}'.format(str(uuid.uuid4()))
    bind_count = 0
    max_bind_count = 2
    item_url_tpl = 'https://ctaxccgp.zcygov.cn/items/{}'
    item_api_url_tpl = 'https://ctaxccgp.zcygov.cn/front/detail/item/{}?timestamp={}'

    dao = SimpleDao()
    session = requests.Session()

    def __init__(self, sku_id, total_page, *args, **kwargs):
        super(ItemCrawler, self).__init__(*args, **kwargs)
        self.sku_id = sku_id
        self.total_page = total_page
        self.item_url = self.item_url_tpl.format(sku_id)
        self.data_cache = None

    async def init_context(self):
        # 初始化
        if not os.path.exists(self.tmp_dir):
            os.makedirs(self.tmp_dir)
        self.browser = await launch(
            headless=False,
            userDataDir=self.tmp_dir,
            devtools=False,
            ignoreDefaultArgs=['--enable-automation']
        )
        self.page = await self.browser.newPage()

        # 参数设置
        await self.page.setViewport({"width": 1440, "height": 1080})
        agent = random.choice(agents)
        await self.page.setUserAgent(agent)
        await self.page.evaluate('''() => {Object.defineProperty(navigator, 'webdriver', {get: () => undefined})}''')

        # 注册拦截器
        await self.page.setRequestInterception(True)
        self.page.on("request", self.request_interceptor)
        self.page.on("response", self.response_interceptor)

    async def destroy_context(self):
        # 关闭浏览器
        await self.page.close()
        await self.browser.close()
        if not os.path.exists(self.tmp_dir):
            os.remove(self.tmp_dir)
        return

    async def run(self):
        # 初始化浏览器
        await self.init_context()
        # 打开/刷新商品详情页面
        await self.reload_page()
        # 点击分页
        await self.get_next_page()
        # 关闭浏览器
        await self.destroy_context()

    # 刷新页面
    async def reload_page(self):
        try:
            await self.page.goto(self.item_url)
            await self.page.waitFor(random.randint(500, 1200))
            await self.page.waitForSelector('div#tab-dealrecord')
            await self.page.click('div#tab-dealrecord')
            await self.page.waitFor(random.randint(1200, 1800))
            # 清理bind记录
            self.bind_count = 0
        except Exception:
            # 清理老环境
            await self.destroy_context()
            await asyncio.sleep(1.2)
            # 初始化浏览器
            await self.init_context()
            # 打开/刷新商品详情页面
            await self.page.goto(self.item_url)
            await asyncio.sleep(1.5)
            await self.reload_page()

    # 检查/处理滑块
    async def check_slider(self):
        slider = await self.page.querySelector('#nc_nvc_wrapper')
        if slider:
            is_hidden = await self.page.evaluate(
                '()=>{return window.getComputedStyle(document.getElementById("nc_nvc_wrapper")).display === "none";}')
            if not is_hidden:
                await slide_move(self.page)
                await self.page.waitFor(random.randint(1000, 1500))
                val = await self.page.evaluate('()=>{return getNVCVal();}')
                print('--> {}'.format(val))
                return

    async def get_next_page(self):
        try:
            for page in range(1, self.total_page + 1):
                if self.data_cache:
                    await self.page.evaluate('()=>{getNC();}')
                    await asyncio.sleep(random.randint(300, 500) / 1000)

                await self.check_slider()

                await self.page.waitForSelector('div.po-pagination input.po-input__inner'),
                await self.page.evaluate('document.querySelector("div.po-pagination input.po-input__inner").value=""')
                await self.page.type('div.po-pagination input.po-input__inner', str(page))
                # await self.page.waitFor(random.randint(300, 500))
                await self.page.click('div.po-pagination button.btn-go')
                # await asyncio.sleep(random.randint(1200, 2200) / 1000)
        except Exception:
            # await self.check_slider()
            await self.page.waitFor(random.randint(500, 1500))
            await self.reload_page()

    async def response_interceptor(self, response: Response):
        if "front/detail/item/dealRecord" in response.url:
            url_params = parse_qs(urlparse(response.url).query)
            page_no = url_params['pageNo'][0]
            content = await response.text()
            json_data = json.loads(content)
            if json_data and not json_data.get('success'):
                rs_code = json_data.get('code', '')
                if rs_code == '403':
                    # 403直接刷新
                    return await self.reload_page()
                elif rs_code == '400':
                    # 400观察统计，超过阈值刷新
                    self.bind_count = self.bind_count + 1
                    if self.bind_count > self.max_bind_count:
                        return await self.reload_page()

                print('Erro[{}] bind={}, page={} ==> {}'.format(rs_code, self.bind_count, page_no, json_data))

    async def request_interceptor(self, request: Request):
        # 通过类型丢弃请求
        if request.resourceType in ['image', 'websocket', 'other']:
            return await request.abort()
        # 按连接丢弃请求
        if 'galaxy/collect/push' in request.url:
            return await request.abort()

        # css加缓存
        # if request.resourceType in ['stylesheet']:
        #     if not cache.exists(request.url):
        #         try:
        #             response = self.session.request(
        #                 url=request.url,
        #                 method=request.method,
        #                 headers=request.headers,
        #                 data=request.postData,
        #                 timeout=30,
        #             )
        #             resp = {"body": response.content, "headers": response.headers, "status": response.status_code}
        #             cache.add(request.url, resp)
        #             return await request.respond(resp)
        #         except Exception as e:
        #             return await request.abort()
        #     else:
        #         # css走缓存
        #         resp = cache.get(request.url)
        #         return await request.respond(resp)

        # 订单请求
        if 'front/detail/item/dealRecord' in request.url:
            try:
                # 有缓存数据，直接返回
                if self.data_cache:
                    print('{}'.format(request.headers['aliyunvalidateparam']))
                    resp = {"body": self.data_cache.content, "headers": self.data_cache.headers, "status": self.data_cache.status_code}
                    return await request.respond(resp)

                # 无缓存数据发起初始化请求
                response = self.session.request(
                    url=request.url,
                    method=request.method,
                    headers=request.headers,
                    data=request.postData,
                    timeout=30,
                    # proxies={
                    #     "http": "http://{}".format(proxy),
                    #     "https": "https://{}".format(proxy),
                    # },
                )
                json_data = json.loads(response.text)
                if json_data and json_data.get('success'):
                    self.data_cache = response
                resp = {"body": response.content, "headers": response.headers, "status": response.status_code}
                return await request.respond(resp)
            except Exception as e:
                print(e)
                return await request.abort()
        else:
            return await request.continue_()


if __name__ == '__main__':
    asyncio.get_event_loop().run_until_complete(ItemCrawler('18636828', 48).run())
